%load_ext autoreload
%autoreload 2
from scip_workflows.common import *
from scip_workflows.core import plot_gate_czi
import flowutils
from matplotlib.collections import PatchCollection
from sklearn.preprocessing import robust_scale, scale
from scip.features import texture
from aicsimageio import AICSImage
try:
features = snakemake.input[0]
output_columns = snakemake.output.columns
output_index = snakemake.output.index
output_aspect = snakemake.output.aspect
output_eccentricity = snakemake.output.eccentricity
output_ecc_vs_aspect = snakemake.output.ecc_vs_aspect
except NameError:
# data_dir = Path("/data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/results/scip/202203221745/")
data_dir = Path("/home/maximl/scratch/data/vsc/datasets/cd7/800/scip/061020221736/")
output_index = data_dir / "indices" / "index.npy"
output_columns = data_dir / "indices" / "columns.npy"
output_ecc_vs_aspect = data_dir / "figures" / "CD7_ecc_versus_aspect.png"
output_aspect = data_dir / "figures" / "CD7_qc_major_minor_bf.png"
output_eccentricity = data_dir / "figures" / "CD7_qc_ecc_bf.png"
features = data_dir / "features.parquet"
df = pq.read_table(features).to_pandas()
df = df.set_index(["meta_panel", "meta_replicate", "meta_P", "meta_id"])
df = df.sort_index()
df.shape
(41194, 1188)
seaborn.countplot(data=df.reset_index(), y="meta_panel", hue="meta_replicate")
<AxesSubplot:xlabel='count', ylabel='meta_panel'>
# for now only continue with objects from panel D
df = df.loc["D"]
df.shape
(41194, 1188)
# show all NaN columns
df.columns[df.isna().all(axis=0)]
Index([], dtype='object')
df["meta_loc_r"] = (
df["meta_bbox_minr"] + (df["meta_bbox_maxr"] - df["meta_bbox_minr"]) / 2
)
df["meta_loc_c"] = (
df["meta_bbox_minc"] + (df["meta_bbox_maxc"] - df["meta_bbox_minc"]) / 2
)
border_margin = 30
w, h = 1144, 1144
def is_outside_border(r):
if (r.meta_loc_r - border_margin < 0) or (r.meta_loc_r + border_margin > h):
return False
if (r.meta_loc_c - border_margin < 0) or (r.meta_loc_c + border_margin > w):
return False
return True
df["meta_out_border"] = df.apply(is_outside_border, axis="columns")
def draw_tile(data, x, y, *args, channel=0, **kwargs):
ax = seaborn.scatterplot(data=data, x=x, y=y, **kwargs)
p, rep = data["meta_P"].iloc[0], data["meta_replicate"].iloc[0]
im.set_scene(f"P{p}-D{rep}")
ax.imshow(
numpy.max(im.get_image_data("ZXY", C=channel), axis=0)
/ correction_images[f"D{rep}"][channel],
origin="lower",
cmap="viridis",
)
ax.set_axis_off()
print(f"{p}-{rep}", end=" ")
# DAPI
im = AICSImage(df["meta_path"].iloc[0], reconstruct_mosaic=False)
grid = seaborn.FacetGrid(
data=df.reset_index(), col="meta_P", row="meta_replicate", margin_titles=True
)
grid.map_dataframe(
draw_tile,
y="meta_loc_r",
x="meta_loc_c",
hue="meta_out_border",
s=6,
edgecolors="none",
)
for ax in grid.axes.ravel():
ax.set_axis_off()
# CD45 - EGFP
im = AICSImage(df["meta_path"].iloc[0], reconstruct_mosaic=False)
grid = seaborn.FacetGrid(
data=df.reset_index(), col="meta_P", row="meta_replicate", margin_titles=True
)
grid.map_dataframe(
draw_tile,
y="meta_loc_r",
x="meta_loc_c",
channel=1,
hue="meta_out_border",
s=6,
edgecolors="none",
)
for ax in grid.axes.ravel():
ax.set_axis_off()
# siglec8 - RPe
im = AICSImage(df["meta_path"].iloc[0], reconstruct_mosaic=False)
grid = seaborn.FacetGrid(
data=df.reset_index(), col="meta_P", row="meta_replicate", margin_titles=True
)
grid.map_dataframe(
draw_tile,
y="meta_loc_r",
x="meta_loc_c",
channel=2,
hue="meta_out_border",
s=6,
edgecolors="none",
)
for ax in grid.axes.ravel():
ax.set_axis_off()
# CD15 - APC
im = AICSImage(df["meta_path"].iloc[0], reconstruct_mosaic=False)
grid = seaborn.FacetGrid(
data=df.reset_index(), col="meta_P", row="meta_replicate", margin_titles=True
)
grid.map_dataframe(
draw_tile,
y="meta_loc_r",
x="meta_loc_c",
channel=3,
hue="meta_out_border",
s=6,
edgecolors="none",
)
for ax in grid.axes.ravel():
ax.set_axis_off()
df = df[df["meta_out_border"]]
df.shape
(35924, 1191)
df = df[df["meta_regions_DAPI"] > 0]
df.shape
(31984, 1191)
df = df[
(df["meta_regions_PGC"] > 0)
& (df["meta_regions_Bright"] > 0)
& (df["meta_regions_Oblique"] > 0)
]
df.shape
(30603, 1191)
def get_gate_czi(sel, df, maxn=200, sort=None, channels=[0]):
df = df.loc[sel]
if len(df) > maxn:
df = df.sample(n=maxn)
if sort is not None:
df = df.sort_values(by=sort)
out = []
for path, gdf in df.groupby(["meta_path"]):
ai = AICSImage(path, reconstruct_mosaic=False)
for scene, gdf2 in gdf.groupby(["meta_scene"]):
ai.set_scene(scene)
for tile, gdf3 in gdf2.groupby(["meta_tile"]):
print(tile, scene, path)
for (idx, r) in gdf3.iterrows():
pixels = ai.get_image_data("CXY", Z=0, T=0, C=channels, M=tile)
minr, minc, maxr, maxc = (
int(r["meta_bbox_minr"]),
int(r["meta_bbox_minc"]),
int(r["meta_bbox_maxr"]),
int(r["meta_bbox_maxc"]),
)
out.append(pixels[:, minr:maxr, minc:maxc])
return out
aspect_ratio = (
df["feat_major_axis_length_Bright"] / df["feat_minor_axis_length_Bright"]
)
sel1 = aspect_ratio > 2.5
out1 = get_gate_czi(sel1, df, maxn=4, channels=[0, 4])
0 P15-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi 0 P15-D5 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi 0 P22-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi 0 P3-D5 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
sel2 = aspect_ratio < 1.25
out2 = get_gate_czi(sel2, df, maxn=4, channels=[0, 4])
0 P10-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi 0 P13-D5 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi 0 P23-D5 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi 0 P4-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
from matplotlib.path import Path as mPath
import matplotlib.patches as patches
c = 1
grid = seaborn.displot(data=aspect_ratio)
# grid.ax.axvline(1.8, c="black")
path = mPath(
[[1.8, 0],
[1.8, 100],
[4.5, 200],
[4.5, 1400]],
[mPath.MOVETO, mPath.LINETO, mPath.LINETO, mPath.LINETO]
)
grid.ax.add_patch(patches.PathPatch(path, facecolor="none", lw=1.5))
grid.ax.set_xlabel("major axis / minor axis (brightfield)")
ax = grid.fig.add_axes([0.65, 0.8, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out1[0][c], origin="lower")
ax = grid.fig.add_axes([0.65, 0.55, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out1[1][c], origin="lower")
ax = grid.fig.add_axes([0.65, 0.3, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out1[2][c], origin="lower")
ax = grid.fig.add_axes([0.35, 0.8, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out2[0][c], origin="lower")
ax = grid.fig.add_axes([0.35, 0.55, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out2[1][c], origin="lower")
ax = grid.fig.add_axes([0.35, 0.3, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out2[2][c], origin="lower")
plt.savefig(output_aspect, bbox_inches='tight', pad_inches=0)
sel1 = df.feat_eccentricity_Bright > .8
out1 = get_gate_czi(sel1, df, maxn=4, channels=[0, 4])
0 P2-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi 0 P21-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi 0 P22-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi 0 P24-D4 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
sel2 = df.feat_eccentricity_Bright < 0.5
out2 = get_gate_czi(sel2, df, maxn=4, channels=[0, 4])
0 P11-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi 0 P19-D4 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi 0 P22-D5 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi 0 P7-D4 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
c = 1
grid = seaborn.displot(data=df.feat_eccentricity_combined)
grid.ax.axvline(.8, c="black")
grid.ax.set_xlabel("eccentricity (brightfield)")
ax = grid.fig.add_axes([0.9, 0.8, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out1[0][c], origin="lower")
ax = grid.fig.add_axes([0.9, 0.55, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out1[1][c], origin="lower")
ax = grid.fig.add_axes([0.9, 0.3, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out1[2][c], origin="lower")
ax = grid.fig.add_axes([0.5, 0.8, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out2[0][c], origin="lower")
ax = grid.fig.add_axes([0.5, 0.55, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out2[1][c], origin="lower")
ax = grid.fig.add_axes([0.5, 0.3, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out2[2][c], origin="lower")
plt.savefig(output_eccentricity, bbox_inches='tight', pad_inches=0)
aspect_ratio = (
df["feat_major_axis_length_combined"] / df["feat_minor_axis_length_combined"]
)
sel1 = aspect_ratio > 1.5 * df.feat_eccentricity_combined + 0.7
sel2 = df.feat_eccentricity_combined > 0.1
sel3 = df.feat_eccentricity_combined < 0.5
sel4 = aspect_ratio > 1.05
sel5 = df.feat_eccentricity_combined > 0.8
sel6 = aspect_ratio > 1.8
gate1 = get_gate_czi(
sel1 & sel2 & sel3 & sel4,
df,
sort="feat_eccentricity_combined",
maxn=3,
channels=[4, 0],
)
gate2 = get_gate_czi(
sel5 | sel6, df, sort="feat_eccentricity_combined", maxn=3, channels=[4, 0]
)
0 P10-D1 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi 0 P13-D5 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi 0 P9-D5 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi 0 P12-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi 0 P14-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi 0 P24-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
fig, ax = plt.subplots(dpi=150)
seaborn.scatterplot(
data=df,
x="feat_eccentricity_combined",
y=aspect_ratio,
hue=(sel1 & sel2 & sel3 & sel4) | sel5 | sel6,
s=5,
edgecolors="none",
ax=ax,
legend=False,
)
for i, im in enumerate(gate1):
tmp_ax = fig.add_axes([0.2 + i * 0.05, 0.3 + i * 0.17, 0.15, 0.15], zorder=1)
tmp_ax.imshow(im[0])
tmp_ax.set_axis_off()
for i, im in enumerate(gate2):
tmp_ax = fig.add_axes([0.55 + i * 0.05, 0.35 + i * 0.17, 0.15, 0.15], zorder=1)
tmp_ax.imshow(im[0])
tmp_ax.set_axis_off()
ax.set_ylabel("Aspect ratio (major / minor axis length)")
ax.set_xlabel("Eccentricity")
seaborn.despine(fig)
# plt.savefig(output_ecc_vs_aspect, bbox_inches='tight', pad_inches=0, dpi=200)
plot_gate_czi(sel1 & sel2 & sel3 & sel4, df, maxn=5, channels=[4, 0])
df = df[~((sel1 & sel2 & sel3 & sel4) | sel5 | sel6)]
df.shape
(30045, 1191)
aspect_ratio = df["feat_major_axis_length_DAPI"] / df["feat_minor_axis_length_DAPI"]
seaborn.displot(data=aspect_ratio)
<seaborn.axisgrid.FacetGrid at 0x7fa9d57af910>
sel1 = aspect_ratio > 3
plot_gate_czi(sel1, df, maxn=10, channels=[4, 0])
0 P10-D5 0 P13-D3 0 P14-D3 0 P14-D5 0 P18-D5 0 P21-D3 0 P4-D4 0 P6-D2 0 P8-D3 0 P8-D4
df = df[~sel1]
df.shape
(30002, 1191)
sel1 = aspect_ratio > 2
plot_gate_czi(sel1, df, maxn=10, channels=[4, 0])
df["feat_glcm_mean_contrast_3_Bright"].plot.hist(bins=100)
sel1 = df["feat_glcm_mean_contrast_3_Bright"] > 9
plot_gate_czi(sel1, df, channel=4, sort="feat_glcm_mean_contrast_3_Bright")
sel1 = df["feat_glcm_mean_contrast_3_Bright"] < 2
plot_gate_czi(sel1, df, maxn=20, channel=4, sort="feat_glcm_mean_contrast_3_Bright")
df["feat_glcm_mean_homogeneity_3_DAPI"].hist(bins=50)
sel1 = df["feat_glcm_mean_homogeneity_3_DAPI"] < 0.3
plot_gate_czi(sel1, df, maxn=15, channels=[4, 0])
0 P1-D3 0 P12-D2 0 P15-D5 0 P17-D2 0 P17-D4 0 P18-D2 0 P18-D4 0 P19-D1 0 P19-D5 0 P23-D1 0 P5-D4 0 P6-D1 0 P6-D3 0 P8-D5
df = df[~sel1]
df.shape
(29987, 1191)
sel1 = df["feat_glcm_mean_homogeneity_3_DAPI"] > 0.7
plot_gate_czi(sel1, df, maxn=15, channel=0)
df["feat_glcm_mean_contrast_3_DAPI"].plot.hist(bins=100)
sel1 = df["feat_glcm_mean_contrast_3_DAPI"] > 10
plot_gate_czi(sel1, df, maxn=15, channel=0)
sel1 = df["feat_glcm_mean_contrast_3_DAPI"] < 1.5
plot_gate_czi(sel1, df, maxn=15, channel=0)
def map_names(a):
return {
"feat_sum_DAPI": "DAPI",
"feat_sum_EGFP": "CD45",
"feat_sum_RPe": "Siglec 8",
"feat_sum_APC": "CD15",
}[a]
melted_df = pandas.melt(
df.reset_index(),
id_vars=["meta_P", "meta_replicate"],
value_vars=df.filter(regex="feat_sum_(DAPI|EGFP|RPe|APC)").columns,
)
melted_df.variable = melted_df.variable.apply(map_names)
grid = seaborn.FacetGrid(
data=melted_df,
col="meta_replicate",
row="variable",
sharey=False,
aspect=1.5,
margin_titles=True,
)
grid.map_dataframe(seaborn.stripplot, x="meta_P", y="value", size=1, alpha=0.5)
grid.set_axis_labels("Well image position", "Fluorescence intensity")
grid.set_titles(col_template="Replicate {col_name}", row_template="{row_name}")
grid.add_legend()
# plt.savefig(data_dir / "figures/qc_intensity_distribution_pre.pdf", bbox_inches='tight', pad_inches=0)
<seaborn.axisgrid.FacetGrid at 0x7fa84d61dfd0>
Below are the DAPI intensities. Image positions 4, 9, 10, 14 and 15 of replicate 3 and 20 of replicate 1 have clearly elevated signals. It can be seen on the overview of image at the top of this notebook that the overal image is a bit brighter. This should be fixable with a min max normalization.
grid = seaborn.FacetGrid(data=df.reset_index(), col="meta_replicate")
grid.map_dataframe(seaborn.stripplot, y="feat_sum_DAPI", x="meta_P", s=1, alpha=0.7)
<seaborn.axisgrid.FacetGrid at 0x7fa856274f70>
Here are the cell areas. Image position 6 of replicate 3 shows some problems. When looking at the segmentation positions in the overview image, we can see that the segmentation seems to have underestimated the size of the cells splitting some nuclei in two. It seems best to drop the data from this image position.
grid = seaborn.FacetGrid(data=df.reset_index(), col="meta_replicate")
grid.map_dataframe(seaborn.stripplot, y="feat_area_DAPI", x="meta_P", s=1, alpha=0.7)
<seaborn.axisgrid.FacetGrid at 0x7fa8553d9520>
grid = seaborn.FacetGrid(
data=df.groupby(["meta_replicate", "meta_P"])["feat_sum_DAPI"]
.transform(scale)
.reset_index(),
col="meta_replicate",
)
grid.map_dataframe(seaborn.stripplot, y="feat_sum_DAPI", x="meta_P", s=1, alpha=0.7)
<seaborn.axisgrid.FacetGrid at 0x7fa85556e910>
scale_dapi = df.groupby(["meta_replicate", "meta_P"])["feat_sum_DAPI"].transform(scale)
grid = seaborn.FacetGrid(data=df.reset_index(), col="meta_replicate")
grid.map_dataframe(
seaborn.stripplot, y="feat_combined_sum_APC", x="meta_P", s=1, alpha=0.7
)
<seaborn.axisgrid.FacetGrid at 0x7fa85665f0d0>
sel1 = df["feat_combined_sum_APC"] > 0.5e7
plot_gate_czi(sel1, df, channels=[4, 3])
0 P1-D4 0 P10-D3 0 P14-D3 0 P14-D5 0 P15-D5 0 P2-D3 0 P20-D1 0 P20-D3 0 P25-D1 0 P5-D2 0 P6-D2 0 P7-D1 0 P7-D4 0 P7-D5
sel1.sum()
25
df = df[~sel1]
df.shape
(29962, 1191)
asinh_apc = flowutils.transforms.asinh(
df["feat_combined_sum_APC"], channel_indices=None, t=4e6, m=4, a=1
)
grid = seaborn.FacetGrid(data=asinh_apc.reset_index(), col="meta_replicate")
grid.map_dataframe(
seaborn.stripplot, y="feat_combined_sum_APC", x="meta_P", s=1, alpha=0.5
)
<seaborn.axisgrid.FacetGrid at 0x7fa8537e9eb0>
def asinh_scale(x, t):
return scale(
flowutils.transforms.asinh(x, channel_indices=None, t=t, m=4.5, a=1),
with_std=False,
)
asinh_scale_apc = (
df.groupby(["meta_replicate", "meta_P"])["feat_combined_sum_APC"]
.transform(lambda x: asinh_scale(x, df["feat_combined_sum_APC"].max()))
.reset_index()
)
grid = seaborn.FacetGrid(data=asinh_scale_apc, col="meta_replicate")
grid.map_dataframe(
seaborn.stripplot, y="feat_combined_sum_APC", x="meta_P", s=1, alpha=0.7
)
<seaborn.axisgrid.FacetGrid at 0x7fa854224cd0>
seaborn.displot(data=asinh_scale_apc, x="feat_combined_sum_APC", hue="meta_replicate")
<seaborn.axisgrid.FacetGrid at 0x7fa854cb7940>
grid = seaborn.FacetGrid(data=df.reset_index(), col="meta_replicate")
grid.map_dataframe(
seaborn.stripplot, y="feat_combined_sum_RPe", x="meta_P", s=1, alpha=0.7
)
<seaborn.axisgrid.FacetGrid at 0x7fa747add6a0>
sel1 = df["feat_combined_sum_RPe"] > 0.3e7
plot_gate_czi(sel1, df, channels=[4, 2])
0 P25-D1
sel1.sum()
3
df = df[~sel1]
df.shape
(29959, 1191)
grid = seaborn.FacetGrid(data=df.reset_index(), col="meta_replicate")
grid.map_dataframe(
seaborn.stripplot, y="feat_combined_sum_RPe", x="meta_P", s=1, alpha=0.7
)
<seaborn.axisgrid.FacetGrid at 0x7fa777c33970>
asinh_scale_rpe = (
df.groupby(["meta_replicate", "meta_P"])["feat_combined_sum_RPe"]
.transform(lambda x: asinh_scale(x, df["feat_combined_sum_RPe"].max()))
.reset_index()
)
grid = seaborn.FacetGrid(data=asinh_scale_rpe, col="meta_replicate")
grid.map_dataframe(
seaborn.stripplot, y="feat_combined_sum_RPe", x="meta_P", s=1, alpha=0.7
)
<seaborn.axisgrid.FacetGrid at 0x7fa854cb4490>
seaborn.displot(data=asinh_scale_rpe, x="feat_combined_sum_RPe", hue="meta_replicate")
<seaborn.axisgrid.FacetGrid at 0x7fa854631940>
grid = seaborn.FacetGrid(data=df.reset_index(), col="meta_replicate")
grid.map_dataframe(
seaborn.stripplot, y="feat_combined_sum_EGFP", x="meta_P", s=1, alpha=0.7
)
<seaborn.axisgrid.FacetGrid at 0x7fa7470f2160>
asinh_scale_egfp = (
df.groupby(["meta_replicate", "meta_P"])["feat_combined_sum_EGFP"]
.transform(lambda x: asinh_scale(x, df["feat_combined_sum_EGFP"].max()))
.reset_index()
)
grid = seaborn.FacetGrid(data=asinh_scale_egfp, col="meta_replicate")
grid.map_dataframe(
seaborn.stripplot, y="feat_combined_sum_EGFP", x="meta_P", s=1, alpha=0.7
)
<seaborn.axisgrid.FacetGrid at 0x7fa854ecfc10>
df.shape
(29959, 1191)
numpy.save(output_index, df.index)
numpy.save(output_columns, df.columns)